base_layer = 'block4_conv4'
style_layers = ['block1_pool','block2_pool']

def gram_matrix(x):
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram

def style_loss(style, combination):
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = WIDTH * HEIGHT
    return K.sum(K.square(S - C))/(4. * (pow(channels,2)) * (pow(size,2)))

loss = K.variable(0.)
intermediate_layer= Model(model.input, model.get_layer(base_layer).output)
activation_intermediate_layer = intermediate_layer.predict(base_img)
activation = model.get_layer(base_layer).output
scaling = K.prod(K.cast(K.shape(activation), 'float32'))
loss = loss + K.sum(K.square(activation – 
                             activation_intermediate_layer)) / scaling
  
for layer_name in style_layers:
    activation = model.get_layer(layer_name).output   
    style_layer_output = K.function([model.input],
                                    [model.get_layer(layer_name).output])
    layer_output_style = style_layer_output([style_img.reshape(
                                                   1,WIDTH,HEIGHT,3)/255])
    layer_output_style = layer_output_style[0][0] # (224,224,64) -> (64,)
    loss = loss + style_loss(layer_output_style, activation[0])
